In [1]:
import os
import pandas as pd
import numpy as np
import torch
import dill as pickle

import statistics
import math
import plotly
import plotly.graph_objs as go
import plotly.express as px

from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

import bamt.Networks as Nets
import bamt.Nodes as Nodes
import bamt.Preprocessors as pp
from pgmpy.estimators import K2Score

from tedeous.solver import grid_format_prepare

from IPython.core.display import SVG
from IPython import display
import warnings
warnings.filterwarnings('ignore')
C:\Users\user\anaconda3\envs\article\lib\site-packages\tqdm\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

The Lotka-Volterra equations (Predator-Prey model)

\begin{equation*} \begin{cases} \Large\frac{\partial u}{\partial t} = \normalsize(\alpha - \beta v)u, \\ \Large\frac{\partial v}{\partial t} = \normalsize (-\gamma + \delta u)v. \end{cases} \end{equation*}

$u(t)$ - a function representing the population of prey;
$v(t)$ - a function representing the population of predators;

$\alpha$ - the birth rate coefficient of prey;
$\beta$ - the predation rate coefficient of prey;
$\gamma $ - the mortality rate coefficient of predators;
$\delta$ - the reproductive capacity coefficient.

Source: https://github.com/stan-dev/example-models/blob/master/knitr/lotka-volterra/hudson-bay-lynx-hare.csv

Data: Annual information on the populations of lynxes and hares from 1900 to 1920 (21 rows)

In [2]:
source = pd.read_csv('hudson-bay-lynx-hare.csv', sep = ', ', engine = 'python').values
t = source[:, 0]
x = source[:, 1] # Lynx/Hunters
y = source[:, 2] # Hare/Prey
source_data = [y, x]
In [3]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=t, y=y, name=f'Source data - u (Hare)', line=dict(color='firebrick', width=2)))
fig.add_trace(go.Scatter(x=t, y=x, name=f'Source data - v (Lynx)', line=dict(color='black', width=2)))
fig.update_layout(xaxis_title="Time, t [Years]", yaxis_title="Population")
fig.show()

The application of the multi-objective evolutionary optimization algorithm enables the discovery of a Pareto-optimal set of equations that do not dominate each other. Upon multiple runs of the algorithm on the same data, the obtained equations and their coefficients are collected. The structure of the table is presented in the figure below.

In [4]:
SVG(filename='info.svg')
Out[4]:
$$term_{1\_...
$$term_{2\_...
$$......
$$term_{1\_r\_u...
$$......
$$term_{1\_...
$$term_{2\_...
$$......
$$term_{1\_r\_v...
$$......
$$coeff_{11...
$$coeff_{12...
$$......
$$-1$$
$$......
$$0$$
$$coeff_{12...
$$......
$$-1$$
$$......
$$...$$
$$......
$$...$$
$$\_...
$$\_...
Characteristics of the absence of a term in the derived equation
Characteristics of the absence of a t...

Characteristics of a term in the right-hand side of an equation

Characteristics of a te...
Membership of a term in an equation for a certain function
Membership of a term in an...
System of equations and coefficients of the terms
System of equations and...
Text is not SVG - cannot display

The columns in the table represent the terms obtained during the discovery of partial differential equations. Each entry/row contains the coefficients for each term.

This structure is necessary for constructing a joint distribution to determine which terms occur together most frequently. Based on this data, a Bayesian network will be constructed, allowing for the analysis of interdependencies and relationships among the terms in the system.

1. Preprocessing¶

Preprocessing includes the removal of zero columns/rows, analysis of the frequency of occurrence of specific terms, and assessment of the diversity among the obtained equations.

In [5]:
path = 'numerical_results/'
df = pd.read_csv(f'{path}output_main_hunter_prey_lynx_hare.csv', index_col = 'Unnamed: 0', sep='\t', encoding='utf-8')
df.shape
Out[5]:
(211, 21)
In [6]:
df.head()
Out[6]:
C_u v{power: 1.0} * u{power: 1.0}_u u{power: 1.0}_u v{power: 1.0}_u v{power: 1.0} * dv/dx1{power: 1.0}_u u{power: 1.0} * du/dx1{power: 1.0}_u dv/dx1{power: 1.0}_u du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u du/dx1{power: 1.0} * v{power: 1.0}_u du/dx1{power: 1.0}_r_u ... v{power: 1.0}_v v{power: 1.0} * u{power: 1.0}_v u{power: 1.0}_v dv/dx1{power: 1.0} * v{power: 1.0}_v dv/dx1{power: 1.0} * u{power: 1.0}_v du/dx1{power: 1.0}_v dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v du/dx1{power: 1.0} * u{power: 1.0}_v dv/dx1{power: 1.0}_r_v dv/dx1{power: 1.0} * u{power: 1.0}_r_v
0 0.065667 -0.027988 0.546189 0.000000 0.000000 0.0 0.0 0.0 0.0 -1.0 ... -0.837908 0.026033 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0
1 17.890508 0.000000 0.000000 -0.906833 -0.011958 0.0 0.0 0.0 0.0 -1.0 ... -0.837908 0.026033 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0
2 0.065667 -0.027988 0.546189 0.000000 0.000000 0.0 0.0 0.0 0.0 -1.0 ... -0.837908 0.026033 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0
3 17.890508 0.000000 0.000000 -0.906833 -0.011958 0.0 0.0 0.0 0.0 -1.0 ... -0.837908 0.026033 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0
4 0.065667 -0.027988 0.546189 0.000000 0.000000 0.0 0.0 0.0 0.0 -1.0 ... -0.837908 0.026033 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0

5 rows × 21 columns

In [7]:
for col in df.columns:
    df[col] = df[col].round(decimals = 10)
In [8]:
# Deleting rows if all coefficients on the left side are zero and the coefficient on the right side is -1 (based on the sum of row elements).
df = df.loc[(df.sum(axis=1) != -2), (df.sum(axis=0) != 0)] # for system
# Removing zero columns
df = df.loc[:, (df != 0).any(axis=0)]
# Count of nonzero values in columns
(df != 0).sum(axis = 0)
Out[8]:
C_u                                          211
v{power: 1.0} * u{power: 1.0}_u              164
u{power: 1.0}_u                              162
v{power: 1.0}_u                               40
v{power: 1.0} * dv/dx1{power: 1.0}_u           7
u{power: 1.0} * du/dx1{power: 1.0}_u          29
dv/dx1{power: 1.0}_u                           9
du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u      6
du/dx1{power: 1.0} * v{power: 1.0}_u           5
du/dx1{power: 1.0}_r_u                       211
C_v                                          211
v{power: 1.0}_v                              159
v{power: 1.0} * u{power: 1.0}_v              155
u{power: 1.0}_v                               46
dv/dx1{power: 1.0} * v{power: 1.0}_v          37
dv/dx1{power: 1.0} * u{power: 1.0}_v           8
du/dx1{power: 1.0}_v                           7
dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v      8
du/dx1{power: 1.0} * u{power: 1.0}_v           2
dv/dx1{power: 1.0}_r_v                       210
dv/dx1{power: 1.0} * u{power: 1.0}_r_v         1
dtype: int64
In [9]:
df_initial = df.copy() # The initial data is used for learning the Bayesian network
In [10]:
df.shape
Out[10]:
(211, 21)
In [11]:
for col in df.columns:
    if '_r' in col:
        df = df.astype({col: "int64"})
        df = df.astype({col: "str"}) # Discrete values are wrapped as strings to ensure proper functioning during learning
In [12]:
df.head()
Out[12]:
C_u v{power: 1.0} * u{power: 1.0}_u u{power: 1.0}_u v{power: 1.0}_u v{power: 1.0} * dv/dx1{power: 1.0}_u u{power: 1.0} * du/dx1{power: 1.0}_u dv/dx1{power: 1.0}_u du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u du/dx1{power: 1.0} * v{power: 1.0}_u du/dx1{power: 1.0}_r_u ... v{power: 1.0}_v v{power: 1.0} * u{power: 1.0}_v u{power: 1.0}_v dv/dx1{power: 1.0} * v{power: 1.0}_v dv/dx1{power: 1.0} * u{power: 1.0}_v du/dx1{power: 1.0}_v dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v du/dx1{power: 1.0} * u{power: 1.0}_v dv/dx1{power: 1.0}_r_v dv/dx1{power: 1.0} * u{power: 1.0}_r_v
0 0.065667 -0.027988 0.546189 0.000000 0.000000 0.0 0.0 0.0 0.0 -1 ... -0.837908 0.026033 0.0 0.0 0.0 0.0 0.0 0.0 -1 0
1 17.890508 0.000000 0.000000 -0.906833 -0.011958 0.0 0.0 0.0 0.0 -1 ... -0.837908 0.026033 0.0 0.0 0.0 0.0 0.0 0.0 -1 0
2 0.065667 -0.027988 0.546189 0.000000 0.000000 0.0 0.0 0.0 0.0 -1 ... -0.837908 0.026033 0.0 0.0 0.0 0.0 0.0 0.0 -1 0
3 17.890508 0.000000 0.000000 -0.906833 -0.011958 0.0 0.0 0.0 0.0 -1 ... -0.837908 0.026033 0.0 0.0 0.0 0.0 0.0 0.0 -1 0
4 0.065667 -0.027988 0.546189 0.000000 0.000000 0.0 0.0 0.0 0.0 -1 ... -0.837908 0.026033 0.0 0.0 0.0 0.0 0.0 0.0 -1 0

5 rows × 21 columns

In [13]:
df.groupby(df.columns.tolist(),as_index=False).size()  # The number of diverse obtained systems
Out[13]:
C_u v{power: 1.0} * u{power: 1.0}_u u{power: 1.0}_u v{power: 1.0}_u v{power: 1.0} * dv/dx1{power: 1.0}_u u{power: 1.0} * du/dx1{power: 1.0}_u dv/dx1{power: 1.0}_u du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u du/dx1{power: 1.0} * v{power: 1.0}_u du/dx1{power: 1.0}_r_u ... v{power: 1.0} * u{power: 1.0}_v u{power: 1.0}_v dv/dx1{power: 1.0} * v{power: 1.0}_v dv/dx1{power: 1.0} * u{power: 1.0}_v du/dx1{power: 1.0}_v dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v du/dx1{power: 1.0} * u{power: 1.0}_v dv/dx1{power: 1.0}_r_v dv/dx1{power: 1.0} * u{power: 1.0}_r_v size
0 -0.719965 0.000000 0.008748 0.000000 0.000000 0.018813 0.000000 0.00000 0.000000 -1 ... 0.000000 0.253330 0.018645 0.000000 0.000000 0.000000 0.000000 -1 0 1
1 -0.388341 0.000000 0.000000 0.000000 0.000000 0.019710 0.154030 0.00000 0.000000 -1 ... 0.000000 0.430548 0.000000 0.000000 0.000000 -0.019533 0.000000 -1 0 1
2 -0.388341 0.000000 0.000000 0.000000 0.000000 0.019710 0.154030 0.00000 0.000000 -1 ... 0.000000 0.253330 0.018645 0.000000 0.000000 0.000000 0.000000 -1 0 1
3 -0.388341 0.000000 0.000000 0.000000 0.000000 0.019710 0.154030 0.00000 0.000000 -1 ... 0.025532 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -1 0 2
4 -0.379967 0.000000 0.000000 0.000000 0.000000 0.019675 0.154793 0.00000 0.000000 -1 ... 0.026033 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -1 0 1
5 -0.142318 -0.028009 0.562570 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 -1 ... 0.000000 25.751746 0.000000 0.000000 0.000000 0.000000 0.000000 0 -1 1
6 -0.142318 -0.028009 0.562570 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 -1 ... 0.000000 0.523036 0.000000 0.000000 0.000000 0.000000 -0.005488 -1 0 2
7 -0.142318 -0.028009 0.562570 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 -1 ... 0.000000 0.430548 0.000000 0.000000 0.000000 -0.019533 0.000000 -1 0 7
8 -0.142318 -0.028009 0.562570 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 -1 ... 0.000000 0.183901 0.000000 0.013403 0.000000 0.000000 0.000000 -1 0 6
9 -0.142318 -0.028009 0.562570 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 -1 ... 0.000000 0.253330 0.018645 0.000000 0.000000 0.000000 0.000000 -1 0 18
10 -0.142318 -0.028009 0.562570 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 -1 ... 0.000000 0.000000 0.000000 0.018619 0.000000 0.000000 0.000000 -1 0 1
11 -0.142318 -0.028009 0.562570 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 -1 ... 0.000000 0.000000 0.026424 0.000000 0.000000 0.000000 0.000000 -1 0 2
12 -0.142318 -0.028009 0.562570 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 -1 ... 0.025532 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -1 0 64
13 -0.142318 -0.028009 0.562570 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 -1 ... 0.000000 0.000000 0.027226 0.000000 0.069275 0.000000 0.000000 -1 0 4
14 0.065667 -0.027988 0.546189 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 -1 ... 0.000000 0.253993 0.018674 0.000000 0.000000 0.000000 0.000000 -1 0 5
15 0.065667 -0.027988 0.546189 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 -1 ... 0.000000 0.000000 0.000000 0.018975 0.067130 0.000000 0.000000 -1 0 1
16 0.065667 -0.027988 0.546189 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 -1 ... 0.026033 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -1 0 47
17 0.065667 -0.027988 0.546189 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 -1 ... 0.000000 0.000000 0.027006 0.000000 0.064849 0.000000 0.000000 -1 0 2
18 2.928678 0.000000 0.103895 0.000000 0.000000 0.000000 0.000000 0.00000 0.025214 -1 ... 0.025532 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -1 0 1
19 6.248662 0.000000 0.000000 0.000000 0.000000 0.000000 0.076275 0.00000 0.025155 -1 ... 0.025532 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -1 0 1
20 8.618788 0.000000 0.000000 -0.439853 0.000000 0.012617 0.000000 0.00000 0.000000 -1 ... 0.000000 0.253993 0.018674 0.000000 0.000000 0.000000 0.000000 -1 0 1
21 8.618788 0.000000 0.000000 -0.439853 0.000000 0.012617 0.000000 0.00000 0.000000 -1 ... 0.026033 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -1 0 4
22 8.768071 0.000000 0.000000 -0.442624 0.000000 0.012593 0.000000 0.00000 0.000000 -1 ... 0.000000 0.253330 0.018645 0.000000 0.000000 0.000000 0.000000 -1 0 3
23 8.768071 0.000000 0.000000 -0.442624 0.000000 0.012593 0.000000 0.00000 0.000000 -1 ... 0.025532 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -1 0 15
24 11.653214 0.000000 0.000000 -0.376289 0.000000 0.000000 0.000000 0.00000 0.016304 -1 ... 0.025532 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -1 0 3
25 16.786657 0.000000 0.000000 -0.785150 0.000000 0.000000 0.000000 0.02145 0.000000 -1 ... 0.025532 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -1 0 6
26 17.890508 0.000000 0.000000 -0.906833 -0.011958 0.000000 0.000000 0.00000 0.000000 -1 ... 0.026033 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -1 0 4
27 18.056171 -0.028170 0.000000 0.000000 0.000000 0.000000 0.797993 0.00000 0.000000 -1 ... 0.025532 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -1 0 3
28 18.094896 -0.007885 0.000000 -0.653176 0.000000 0.000000 0.000000 0.00000 0.000000 -1 ... 0.025532 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -1 0 1
29 18.113642 0.000000 0.000000 -0.908095 -0.011720 0.000000 0.000000 0.00000 0.000000 -1 ... 0.025532 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -1 0 3

30 rows × 22 columns

In [14]:
all_r = df.shape[0]
unique_r = df.groupby(df.columns.tolist(),as_index=False).size().shape[0]

print(f'Out of {all_r} obtained systems, \033[1m{unique_r} are unique\033[0m ({int(unique_r / all_r * 100)} %)')
Out of 211 obtained systems, 30 are unique (14 %)
In [15]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 211 entries, 0 to 210
Data columns (total 21 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   C_u                                        211 non-null    float64
 1   v{power: 1.0} * u{power: 1.0}_u            211 non-null    float64
 2   u{power: 1.0}_u                            211 non-null    float64
 3   v{power: 1.0}_u                            211 non-null    float64
 4   v{power: 1.0} * dv/dx1{power: 1.0}_u       211 non-null    float64
 5   u{power: 1.0} * du/dx1{power: 1.0}_u       211 non-null    float64
 6   dv/dx1{power: 1.0}_u                       211 non-null    float64
 7   du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u  211 non-null    float64
 8   du/dx1{power: 1.0} * v{power: 1.0}_u       211 non-null    float64
 9   du/dx1{power: 1.0}_r_u                     211 non-null    object 
 10  C_v                                        211 non-null    float64
 11  v{power: 1.0}_v                            211 non-null    float64
 12  v{power: 1.0} * u{power: 1.0}_v            211 non-null    float64
 13  u{power: 1.0}_v                            211 non-null    float64
 14  dv/dx1{power: 1.0} * v{power: 1.0}_v       211 non-null    float64
 15  dv/dx1{power: 1.0} * u{power: 1.0}_v       211 non-null    float64
 16  du/dx1{power: 1.0}_v                       211 non-null    float64
 17  dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v  211 non-null    float64
 18  du/dx1{power: 1.0} * u{power: 1.0}_v       211 non-null    float64
 19  dv/dx1{power: 1.0}_r_v                     211 non-null    object 
 20  dv/dx1{power: 1.0} * u{power: 1.0}_r_v     211 non-null    object 
dtypes: float64(18), object(3)
memory usage: 36.3+ KB

2. Initializing Bayessian Network¶

In [16]:
discretizer = preprocessing.KBinsDiscretizer(n_bins=9, encode='ordinal', strategy='quantile') # Empirical parameter tuning
encoder = preprocessing.LabelEncoder()
p = pp.Preprocessor([('encoder', encoder), ('discretizer', discretizer)])
data, est = p.apply(df)
info_r = p.info
In [17]:
info_r
Out[17]:
{'types': {'C_u': 'cont',
  'v{power: 1.0} * u{power: 1.0}_u': 'cont',
  'u{power: 1.0}_u': 'cont',
  'v{power: 1.0}_u': 'cont',
  'v{power: 1.0} * dv/dx1{power: 1.0}_u': 'cont',
  'u{power: 1.0} * du/dx1{power: 1.0}_u': 'cont',
  'dv/dx1{power: 1.0}_u': 'cont',
  'du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u': 'cont',
  'du/dx1{power: 1.0} * v{power: 1.0}_u': 'cont',
  'du/dx1{power: 1.0}_r_u': 'disc',
  'C_v': 'cont',
  'v{power: 1.0}_v': 'cont',
  'v{power: 1.0} * u{power: 1.0}_v': 'cont',
  'u{power: 1.0}_v': 'cont',
  'dv/dx1{power: 1.0} * v{power: 1.0}_v': 'cont',
  'dv/dx1{power: 1.0} * u{power: 1.0}_v': 'cont',
  'du/dx1{power: 1.0}_v': 'cont',
  'dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v': 'cont',
  'du/dx1{power: 1.0} * u{power: 1.0}_v': 'cont',
  'dv/dx1{power: 1.0}_r_v': 'disc',
  'dv/dx1{power: 1.0} * u{power: 1.0}_r_v': 'disc'},
 'signs': {'C_u': 'neg',
  'v{power: 1.0} * u{power: 1.0}_u': 'neg',
  'u{power: 1.0}_u': 'pos',
  'v{power: 1.0}_u': 'neg',
  'v{power: 1.0} * dv/dx1{power: 1.0}_u': 'neg',
  'u{power: 1.0} * du/dx1{power: 1.0}_u': 'pos',
  'dv/dx1{power: 1.0}_u': 'pos',
  'du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u': 'pos',
  'du/dx1{power: 1.0} * v{power: 1.0}_u': 'pos',
  'C_v': 'neg',
  'v{power: 1.0}_v': 'neg',
  'v{power: 1.0} * u{power: 1.0}_v': 'pos',
  'u{power: 1.0}_v': 'pos',
  'dv/dx1{power: 1.0} * v{power: 1.0}_v': 'pos',
  'dv/dx1{power: 1.0} * u{power: 1.0}_v': 'pos',
  'du/dx1{power: 1.0}_v': 'pos',
  'dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v': 'neg',
  'du/dx1{power: 1.0} * u{power: 1.0}_v': 'neg'}}
In [18]:
data.head()
Out[18]:
C_u v{power: 1.0} * u{power: 1.0}_u u{power: 1.0}_u v{power: 1.0}_u v{power: 1.0} * dv/dx1{power: 1.0}_u u{power: 1.0} * du/dx1{power: 1.0}_u dv/dx1{power: 1.0}_u du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u du/dx1{power: 1.0} * v{power: 1.0}_u du/dx1{power: 1.0}_r_u ... v{power: 1.0}_v v{power: 1.0} * u{power: 1.0}_v u{power: 1.0}_v dv/dx1{power: 1.0} * v{power: 1.0}_v dv/dx1{power: 1.0} * u{power: 1.0}_v du/dx1{power: 1.0}_v dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v du/dx1{power: 1.0} * u{power: 1.0}_v dv/dx1{power: 1.0}_r_v dv/dx1{power: 1.0} * u{power: 1.0}_r_v
0 2 2 1 1 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 1
1 3 3 0 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 1
2 2 2 1 1 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 1
3 3 3 0 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 1
4 2 2 1 1 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 1

5 rows × 21 columns

2.1. Defining the network type¶

There are 3 type of Bayessian Networks - DiscreteBN, ContinuousBN, HybridBN.

  • The parameter 'has_logit' is used in the training of the structure and specifically addresses the question: "Is it possible for a continuous node to have discrete 'children'?"
  • The parameter 'use_mixture' answers the question: "Is a Gaussian mixture model being used?"
In [19]:
bn = Nets.HybridBN(has_logit=True, use_mixture=True)
In [20]:
bn.add_nodes(info_r)
In [21]:
bn.get_info()
Out[21]:
name node_type data_type parents parents_types
0 C_u Gaussian cont [] []
1 v{power: 1.0} * u{power: 1.0}_u Gaussian cont [] []
2 u{power: 1.0}_u Gaussian cont [] []
3 v{power: 1.0}_u Gaussian cont [] []
4 v{power: 1.0} * dv/dx1{power: 1.0}_u Gaussian cont [] []
5 u{power: 1.0} * du/dx1{power: 1.0}_u Gaussian cont [] []
6 dv/dx1{power: 1.0}_u Gaussian cont [] []
7 du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u Gaussian cont [] []
8 du/dx1{power: 1.0} * v{power: 1.0}_u Gaussian cont [] []
9 du/dx1{power: 1.0}_r_u Discrete disc [] []
10 C_v Gaussian cont [] []
11 v{power: 1.0}_v Gaussian cont [] []
12 v{power: 1.0} * u{power: 1.0}_v Gaussian cont [] []
13 u{power: 1.0}_v Gaussian cont [] []
14 dv/dx1{power: 1.0} * v{power: 1.0}_v Gaussian cont [] []
15 dv/dx1{power: 1.0} * u{power: 1.0}_v Gaussian cont [] []
16 du/dx1{power: 1.0}_v Gaussian cont [] []
17 dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v Gaussian cont [] []
18 du/dx1{power: 1.0} * u{power: 1.0}_v Gaussian cont [] []
19 dv/dx1{power: 1.0}_r_v Discrete disc [] []
20 dv/dx1{power: 1.0} * u{power: 1.0}_r_v Discrete disc [] []
In [22]:
variable_names = ['u', 'v']
In [23]:
df_temp = (df_initial[[col for col in df_initial.columns if '_r' in col]] != 0).copy()
print(df_temp.sum(axis=0).sort_values(ascending=False)[:len(variable_names)])
init_nodes_list = []
for i in range(len(variable_names)):
    init_nodes = df_temp.sum(axis=0).idxmax()
    init_nodes_list.append(init_nodes)
    df_temp = df_temp.drop(init_nodes, axis=1)
print(init_nodes_list)
params = {"init_nodes": init_nodes_list, 'init_edges':[('du/dx1{power: 1.0}_r_u', 'v{power: 1.0} * u{power: 1.0}_u'), ('dv/dx1{power: 1.0}_r_v', 'v{power: 1.0} * u{power: 1.0}_v')], 
         'remove_init_edges':True}
du/dx1{power: 1.0}_r_u    211
dv/dx1{power: 1.0}_r_v    210
dtype: int64
['du/dx1{power: 1.0}_r_u', 'dv/dx1{power: 1.0}_r_v']
In [24]:
bn.add_edges(data, scoring_function=('K2', K2Score), params = params) # Structure Learning
In [25]:
bn.get_info()
Out[25]:
name node_type data_type parents parents_types
0 v{power: 1.0} * dv/dx1{power: 1.0}_u MixtureGaussian cont [] []
1 dv/dx1{power: 1.0}_u MixtureGaussian cont [] []
2 du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u MixtureGaussian cont [] []
3 du/dx1{power: 1.0} * v{power: 1.0}_u MixtureGaussian cont [] []
4 du/dx1{power: 1.0}_r_u Discrete disc [] []
5 dv/dx1{power: 1.0} * u{power: 1.0}_v MixtureGaussian cont [] []
6 du/dx1{power: 1.0}_v MixtureGaussian cont [] []
7 dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v MixtureGaussian cont [] []
8 du/dx1{power: 1.0} * u{power: 1.0}_v MixtureGaussian cont [] []
9 dv/dx1{power: 1.0}_r_v Discrete disc [] []
10 v{power: 1.0} * u{power: 1.0}_u ConditionalMixtureGaussian cont [du/dx1{power: 1.0}_r_u] [disc]
11 dv/dx1{power: 1.0} * u{power: 1.0}_r_v Discrete disc [dv/dx1{power: 1.0}_r_v] [disc]
12 u{power: 1.0}_u MixtureGaussian cont [v{power: 1.0} * u{power: 1.0}_u] [cont]
13 v{power: 1.0}_u MixtureGaussian cont [u{power: 1.0}_u] [cont]
14 C_u MixtureGaussian cont [v{power: 1.0} * u{power: 1.0}_u, u{power: 1.0... [cont, cont]
15 u{power: 1.0} * du/dx1{power: 1.0}_u MixtureGaussian cont [u{power: 1.0}_u, v{power: 1.0}_u] [cont, cont]
16 C_v ConditionalMixtureGaussian cont [C_u, u{power: 1.0}_u, dv/dx1{power: 1.0} * u{... [cont, cont, disc]
17 v{power: 1.0}_v MixtureGaussian cont [C_v] [cont]
18 u{power: 1.0}_v MixtureGaussian cont [C_v] [cont]
19 v{power: 1.0} * u{power: 1.0}_v MixtureGaussian cont [C_v, v{power: 1.0}_v] [cont, cont]
20 dv/dx1{power: 1.0} * v{power: 1.0}_v MixtureGaussian cont [C_v, v{power: 1.0} * u{power: 1.0}_v, u{power... [cont, cont, cont]
In [26]:
bn.plot(f'output_main_hunter_prey.html')
Out[26]:

3. Parameters learning and sampling¶

In [27]:
%%time
bn.fit_parameters(df_initial)
CPU times: total: 18.6 s
Wall time: 5.98 s
In [28]:
def get_objects(synth_data):
    """
    Parameters
        ----------
        synth_data : pd.dataframe
            The fields in the table are structures of received systems/equations,
            where each record/row contains coefficients at each structure
        config_bamt:  class Config from TEDEouS/config.py contains the initial configuration of the task
    Returns
        -------
        objects_result - list objects (combination of equations or systems)
    """
    objects = [] # equations or systems
    for i in range(len(synth_data)):
        object_row = {}
        for col in synth_data.columns:
            object_row[synth_data[col].name] = synth_data[col].values[i]
        objects.append(object_row)

    objects_result = []
    for i in range(len(synth_data)):
        object_res = {}
        for key, value in objects[i].items():
            if abs(float(value)) > 0.01:
                object_res[key] = value
        objects_result.append(object_res)

    return objects_result
In [29]:
objects_res = []
sample_k = 30
while len(objects_res) < sample_k:
    synth_data = bn.sample(200, as_df=True)
    temp_res = get_objects(synth_data)

    if len(temp_res) + len(objects_res) > sample_k:
        objects_res += temp_res[:sample_k - len(objects_res)]
    else:
        objects_res += temp_res
100%|██████████| 200/200 [00:04<00:00, 45.80it/s]
100%|██████████| 200/200 [00:04<00:00, 47.49it/s]
100%|██████████| 200/200 [00:04<00:00, 47.66it/s]
100%|██████████| 200/200 [00:04<00:00, 47.57it/s]

4. System structure validation¶

In [30]:
list_correct_structures_unique = ['v{power: 1.0} * u{power: 1.0}_v', 'v{power: 1.0}_v', 'dv/dx1{power: 1.0}_r_v',
                          'v{power: 1.0} * u{power: 1.0}_u', 'u{power: 1.0}_u', 'du/dx1{power: 1.0}_r_u']
In [31]:
import re
import itertools

list_correct_structures = set()
for term in list_correct_structures_unique:
    str_r = '_r' if '_r' in term else ''
    str_elem = ''
    if any(f'_{elem}' in term for elem in variable_names):
        for elem in variable_names:
            if f'_{elem}' in term:
                term = term.replace(f'_{elem}', "")
                str_elem = f'_{elem}'
    # for case if several terms exist
    arr_term = re.sub('_r', '', term).split(' * ')
    perm_set = list(itertools.permutations([i for i in range(len(arr_term))]))
    for p_i in perm_set:
        temp = " * ".join([arr_term[i] for i in p_i]) + str_r + str_elem
        list_correct_structures.add(temp)       
In [32]:
def out_red(text):
    print("\033[31m {}".format(text), end='')

def out_green(text):
    print("\033[32m {}".format(text), end='')

met, k_sys = 0, len(objects_res)
k_min = k_sys if k_sys < 5 else 5

for object_row in objects_res[:k_min]:
    k_c, k_l = 0, 0
    for col in df_initial.columns:
        if col in object_row:
            if col in list_correct_structures:
                k_c += 1
                out_green(f'{col}')
                print(f'\033[0m:{object_row[col]}')
            else:
                k_l += 1
                out_red(f'{col}')
                print(f'\033[0m:{object_row[col]}')
    print(f'correct structures = {k_c}/{len(list_correct_structures_unique)}')
    print(f'incorrect structures = {k_l}')
    print('--------------------------')

for object_row in objects_res:
    for temp in object_row.keys():
        if temp in list_correct_structures:
            met += 1

print(f'average value (equation - {k_sys}) = {met / k_sys}')
 C_u:-0.13706544031186782
 v{power: 1.0} * u{power: 1.0}_u:-0.02796083044228613
 u{power: 1.0}_u:0.5257918697164456
 dv/dx1{power: 1.0}_u:0.016114912486107302
 du/dx1{power: 1.0}_r_u:-1.0
 v{power: 1.0}_v:-0.8356804923371564
 v{power: 1.0} * u{power: 1.0}_v:0.025941183828859165
 dv/dx1{power: 1.0}_r_v:-1.0
correct structures = 6/6
incorrect structures = 2
--------------------------
 C_u:-0.1390545344562649
 v{power: 1.0} * u{power: 1.0}_u:-0.027979225523030306
 u{power: 1.0}_u:0.5397202971530966
 dv/dx1{power: 1.0}_u:0.029346320517112234
 du/dx1{power: 1.0}_r_u:-1.0
 v{power: 1.0}_v:-0.8356853851794231
 v{power: 1.0} * u{power: 1.0}_v:0.025941384620031628
 dv/dx1{power: 1.0}_r_v:-1.0
correct structures = 6/6
incorrect structures = 2
--------------------------
 C_u:-0.1447921439943488
 v{power: 1.0} * u{power: 1.0}_u:-0.02803228675995169
 u{power: 1.0}_u:0.5798973186059193
 du/dx1{power: 1.0}_r_u:-1.0
 v{power: 1.0}_v:-0.8356994984859185
 v{power: 1.0} * u{power: 1.0}_v:0.025941963791211144
 dv/dx1{power: 1.0}_r_v:-1.0
correct structures = 6/6
incorrect structures = 1
--------------------------
 C_u:-0.14536311767132443
 v{power: 1.0} * u{power: 1.0}_u:-0.02803756710559053
 u{power: 1.0}_u:0.5838955031314497
 dv/dx1{power: 1.0}_u:0.052687451197191856
 du/dx1{power: 1.0}_r_u:-1.0
 v{power: 1.0}_v:-0.8357009031768629
 v{power: 1.0} * u{power: 1.0}_v:0.025942021449693667
 dv/dx1{power: 1.0}_r_v:-1.0
correct structures = 6/6
incorrect structures = 2
--------------------------
 C_u:-0.1400638508397519
 v{power: 1.0} * u{power: 1.0}_u:-0.02798855965026343
 u{power: 1.0}_u:0.546787931456705
 du/dx1{power: 1.0}_r_u:-1.0
 v{power: 1.0}_v:-0.8356878678029228
 v{power: 1.0} * u{power: 1.0}_v:0.025941486498431917
 dv/dx1{power: 1.0}_r_v:-1.0
correct structures = 6/6
incorrect structures = 1
--------------------------
average value (equation - 30) = 5.333333333333333

==================================

Source: https://github.com/stan-dev/example-models/blob/master/knitr/lotka-volterra/lotka-volterra-predator-prey.Rmd (line 558)

"Using a non-statistically motivated error term and optimization, Howard (2009) provides the following point estimates for the model parameters based on the data."

$\alpha$ = 0.55; $\beta$ = 0.028; $\gamma $ = 0.84; $\delta$ = 0.026

\begin{equation*} \begin{cases} \Large\frac{\partial u}{\partial t} = \normalsize 0.55 \cdot u - 0.028 \cdot u \cdot v, \\ \Large\frac{\partial v}{\partial t} = \normalsize - 0.84 \cdot v + 0.026 \cdot u \cdot v. \end{cases} \end{equation*}

$t \in$ [0, 20], $u_0, \ v_0$ = 30, 4

The equations have periodic solutions. The solution was obtained using scipy.integrate.odeint

In [33]:
t_numerical = np.load(f'{path}t_numerical.npy')
numerical_solution = np.load(f'{path}numerical_solution.npy')
x_numerical = numerical_solution.T[:, 0] # Hare/Prey
y_numerical = numerical_solution.T[:, 1] # Lynx/Hunters
numerical_data = [x_numerical, y_numerical]
In [34]:
t_new = np.linspace(0, int(t_numerical[-1]), len(t)) # Shifting from the interval 1900-1920 to 0-20 for t with the same step size
In [35]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=t_numerical, y=x_numerical, name=f'Numerical solution - u (Hare)',  line=dict(color='firebrick', width=2)))
fig.add_trace(go.Scatter(x=t_numerical, y=y_numerical, name=f'Numerical solution - v (Lynx)',  line=dict(color='black', width=2)))


fig.add_trace(go.Scatter(x=t_new, y=y, 
                         name=f'Source data - u (Hare)',
                         line=dict(color='firebrick', dash='dash', width=2)))

fig.add_trace(go.Scatter(x=t_new, y=x, 
                         name=f'Source data - v (Lynx)',
                         line=dict(color='black', dash='dash', width=2)))
    
fig.update_layout(title="Hunter-prey",
                  xaxis_title="Time",
                  yaxis_title="Population")

fig.show()

5. Equation Solution¶

The generated/sampled 30 systems based on the Bayesian network are solved to determine the uncertainty in the data domain.

In [36]:
max_deriv_order = 1# 1 or 2

if max_deriv_order == 1:
    set_solutions = torch.load(f'{path}file_u_main_first_derivative.pt')
    equations = []
    with open(f'{path}data_equations_30_first.pickle', 'rb') as f:
        equations = pickle.load(f)
    param = [np.linspace(0, 11, 201), ]
    list_incor_sol = [5, 6, 13, 20, 26, 29]
else:
    set_solutions = torch.load(f'{path}file_u_main_second_derivative.pt')
    equations = []
    with open(f'{path}data_equations_30_second.pickle', 'rb') as f:
        equations = pickle.load(f)
    param = [np.linspace(0, 11, 1001),]
    list_incor_sol = [3, 4, 8, 14]
In [37]:
variable_names = ['u', 'v'] # list of objective function names
grid = grid_format_prepare(param, "mat")
In [38]:
num_steps = len(set_solutions)
fig = go.Figure(data=[go.Scatter(x = grid.reshape(-1), y=set_solutions[0][:, 0], name='u_autograd'),
                      go.Scatter(x = grid.reshape(-1), y=set_solutions[0][:, 1], name='v_autograd')])

frames=[]
for i in range(0, len(set_solutions)):
    frames.append(go.Frame(name=str(i),
                           data=[go.Scatter(x=grid.reshape(-1), y=set_solutions[i, :, 0], name='u_autograd'),
                                 go.Scatter(x=grid.reshape(-1),  y=set_solutions[i, :, 1], name='v_autograd')])) 

steps = []
for i in range(num_steps):

    step = dict(
        label = f'{i}',
        method = "animate",
        args = [[str(i)]]
    )
    steps.append(step)

sliders = [dict(
    currentvalue = {"prefix": "Уравнение №", "font": {"size": 14}},
    len = 1,
    x = 0,
    pad = {"b": 10, "t": 50},
    steps = steps,
)]

# title нужно задавать в общем случае
fig.update_layout(title="Hunters - Prey",
                  xaxis_title="Time t",
                  yaxis_title="Population",
                 legend=dict(yanchor="top", y=0.99, xanchor="right", x=0.99
                            ))


fig.layout.sliders = sliders
fig.frames = frames  

fig.show()
In [39]:
def token_check_obj(columns, object_row):
    list_correct_structures = list_correct_structures_unique
    k_c, k_l = 0, 0
    for col in columns:
        if col in object_row:
            if col in list_correct_structures:
                k_c += 1
            else:
                k_l += 1
    return k_c, k_l

def plot_token_check_obj(columns, object_row):
    list_correct_structures = list_correct_structures_unique
    k_c, k_l = 0, 0
    for col in columns:
        if col in object_row:
            if col in list_correct_structures:
                k_c += 1
                out_green(f'{col}')
                print(f'\033[0m:{object_row[col]}')
            else:
                k_l += 1
                out_red(f'{col}')
                print(f'\033[0m:{object_row[col]}')
    print(f'correct structures = {k_c}/{6}')
    print(f'incorrect structures = {k_l}')

After obtaining the solutions, it is possible to visually examine them and exclude those that clearly do not fit (expert knowledge). Considering the nature of the studied process, which is the interaction between predator and prey, in our case, solutions cannot enter the negative region or remain constant.

6. The confidence interval¶

The confidence interval for the mean with unknown variance is calculated using the formula:

$$(\overline{x} - \frac{S}{\sqrt{n}} C_{\frac{\alpha}{2}}; \overline{x} + \frac{S}{\sqrt{n}}C_{\frac{\alpha}{2}})$$

, where: $\overline{x}$ - sample mean $S$ - sample standard deviation $n$ - sample size $C_{\frac{\alpha}{2}}$ - quantile $\frac{S}{\sqrt{n}} C_{\frac{\alpha}{2}}$ - maximum estimation error

In [40]:
set_solutions_new = []
for i in range(len(set_solutions)):
    if i not in list_incor_sol:
        if not len(set_solutions_new):
            set_solutions_new = [set_solutions[i]]
        else:
            set_solutions_new.append(set_solutions[i])

set_solutions_new = np.array(set_solutions_new)
temp = set_solutions.copy()
set_solutions = set_solutions_new.copy()
In [41]:
mean_arr = np.zeros((set_solutions.shape[1], set_solutions.shape[2]))
var_arr = np.zeros((set_solutions.shape[1], set_solutions.shape[2]))
s_arr = np.zeros((set_solutions.shape[1], set_solutions.shape[2]))  # sample standard deviation of data
In [42]:
for i in range(set_solutions.shape[1]):
    for j in range(set_solutions.shape[2]):
        mean_arr[i, j] = np.mean(set_solutions[:, i, j])
        var_arr[i, j] = np.var(set_solutions[:, i, j])
        s_arr[i, j] = np.std(set_solutions[:, i, j], ddof=1) #statistics.stdev()
In [43]:
mean_tens = torch.from_numpy(mean_arr)
var_tens = torch.from_numpy(var_arr)
s_arr = torch.from_numpy(s_arr)
In [44]:
# Confidence region for the mean
upper_bound = mean_tens + 1.96 * s_arr / math.sqrt(len(set_solutions))
lower_bound = mean_tens - 1.96 * s_arr / math.sqrt(len(set_solutions))
In [45]:
confidence_region = torch.cat((upper_bound, torch.flip(lower_bound, dims=(0,))), 0)
confidence_grid = torch.cat((grid.reshape(-1),  torch.flip(grid.reshape(-1), dims=(0,))), 0)
In [46]:
# case: if dimensionality = 0 - [t, ] and (variable_names = [u, v] or [u, v, ..]
fig = go.Figure()

for n, param in enumerate(variable_names):
    color = list(np.random.choice(range(256), size=3))
#     fig.add_trace(go.Scatter(x=t_new, y=source_data[n].reshape(-1), name=f'Source data - {param}',  line=dict(color='firebrick', width=4)))
    fig.add_trace(go.Scatter(x=t_numerical, y=numerical_data[n].reshape(-1), name=f'Initial data - {param}',  line=dict(color='firebrick', width=4)))

    
    fig.add_trace(go.Scatter(x=grid.reshape(-1), y=mean_tens[:, n], 
                             name=f'Solution field (mean) - {param}', 
                             line_color=f'rgb({color[0]},{color[1]},{color[2]})',
                             line=dict(dash='dash')))
    
    fig.add_trace(go.Scatter(x = confidence_grid, y = confidence_region[:, n],
                             fill='toself',
                             fillcolor=f'rgba({color[0]},{color[1]},{color[2]},0.2)',
                             line_color='rgba(255,255,255,0)',
                             name=f'Confidence region - {param}'))
    
fig.update_layout(title="hunter_prey",
                  xaxis=dict(range=[0,11]),
                  xaxis_title="Time",
                  yaxis_title="Population")
    
fig.show()